knitr::opts_chunk$set(echo=TRUE,eval=TRUE, message=FALSE, warning=FALSE)
library(httr)
set_config(use_proxy(url="127.0.0.1",port=15236))
library(tidyverse)
library(lubridate)
library(rtweet)
library(leaflet)
library(tigris)
library(plotly)
library(readxl)
library(ggthemes)
library(leaflet)
library(wordcloud)
governor_tweets <- readRDS("governor_tweets.RDS")
governor <- read_csv("governors_twitter1.csv")%>%
select(state, governor, party, twitter_handle, state_abbr)
d <- filter(governor_tweets, grepl("COVID19|corona|virus|SocialDistancing|distancing|mask|ballot|elect|vote", text))
tweets <- d %>%
filter(is_retweet==FALSE) %>%
select(text,screen_name,hashtags,retweet_count)%>%
left_join(governor, by=c("screen_name"="twitter_handle"))
#### word cloud and sentiment
library(tm)
corpus0 <-VCorpus(VectorSource(tweets$text))
# remove fully capitalized words
corpus1 = tm_map(corpus0, content_transformer(function(x) gsub("\\b[A-Z]+\\b","",x)))
corpus1 = tm_map(corpus0, content_transformer(function(x) gsub("[^[:alnum:][:space:]]","",x)))
# create a function to space and remove URLs
removeURL <- content_transformer(function(x) gsub("(f|ht)tp(s?)://\\S+", "", x, perl=T))
corpus1 = tm_map(corpus1, removeURL)
# clean text
clean_corpus <- function(corpus){
corpus <- tm_map(corpus, removePunctuation)
# corpus <- tm_map(corpus, content_transformer(removeNumPunct)) # from qdap
corpus <- tm_map(corpus, content_transformer(tolower))
#corpus <- tm_map(corpus, content_transformer(replace_symbol)) # from qdap
corpus <- tm_map(corpus, removeWords, c(stopwords("en")))
# We could add more stop words as above
corpus <- tm_map(corpus, removeNumbers)
corpus <- tm_map(corpus, stripWhitespace)
return(corpus)
}
clean_corpus<-clean_corpus(corpus1)
library("SnowballC")
corpus1_dtm <- DocumentTermMatrix(clean_corpus)
corpus1_dtm <- as.matrix(corpus1_dtm)
adt = as.data.frame(corpus1_dtm)
all = colSums(adt)
all = data.frame('all'=all)
all$labels = rownames(all)
all = all[order(all[,1],decreasing = TRUE),]
top20all = all[1:20,]
top20all$labels = factor(top20all$labels, levels = top20all$labels)
g_top20 <- ggplot(top20all,aes(x = labels, y = all)) +
geom_bar(stat = 'identity',fill="coral")+ylim(c(0,4000))+
coord_flip() +
labs(title="Top 20 Words in US Governors' Covid 19 and Election Related Tweets",
x='Words',y = 'Frequency',caption= "Source:Twitter"
)+theme_pander()+
theme(plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
axis.text.x = element_text(size=12, hjust=0.1,vjust=1),
axis.text.y = element_text(size=10, face="bold"),
axis.title.y = element_text(size=10,face="bold"),
axis.title.x = element_blank(),
plot.caption = element_text(size=11, face="bold"),
panel.grid.major.x = element_blank())
g_top20
pos <- read.table("positive-words.txt", as.is=T)
neg <- read.table("negative-words.txt", as.is=T)
vector = c()
sentiment <- function(words){
require(quanteda)
tok <- quanteda::tokens(words)
pos.count <- sum(tok[[1]]%in%pos[,1])
#cat("\n positive words:",tok[[1]][which(tok[[1]]%in%pos[,1])],"\n")
neg.count <- sum(tok[[1]]%in%neg[,1])
#cat("\n negative words:",tok[[1]][which(tok[[1]]%in%neg[,1])],"\n")
out <- (pos.count - neg.count)/(pos.count+neg.count)
#cat("\n Tone of Document:",out)
out
}
for (i in 1:7267){
vector[i] <- sentiment(tweets$text[i])
}
tweets["Tone_of_Text"] = vector
#plot positive vs negative words by party
#split tweets by party
d_tweets <- tweets%>%
filter(grepl( "D", party))
r_tweets <- tweets%>%
filter(grepl( "R", party))
# filter positive texts and negative texts
d_pos_texts <- d_tweets %>%
filter(Tone_of_Text > 0) %>%
select(text)
d_neg_texts <- d_tweets %>%
filter(Tone_of_Text < 0) %>%
select(text)
com_texts <- c(d_pos_texts,d_neg_texts)
com_corpus <-VCorpus(VectorSource(com_texts))
com_corpus = tm_map(com_corpus, content_transformer(function(x) gsub("\\b[A-Z]+\\b","",x)))
com_corpus = tm_map(com_corpus, content_transformer(function(x) gsub("[^[:alnum:][:space:]]","",x)))
com_corpus = tm_map(com_corpus, content_transformer(tolower))
com_corpus = tm_map(com_corpus, removeNumbers)
com_corpus = tm_map(com_corpus, removePunctuation)
com_corpus = tm_map(com_corpus, removeWords, c(stopwords("en")))
com_corpus = tm_map(com_corpus, stripWhitespace)
#create document-term-matrix
com_dtm <- DocumentTermMatrix(com_corpus)
com_dtm_ma <- as.matrix(com_dtm)
com_tdm <- TermDocumentMatrix(com_corpus)
com_ma <- as.matrix(com_tdm)
colnames(com_ma) <- c("positive words","negative words")
comparison.cloud(com_ma, colors = c("steelblue","lightsteelblue2"), title.size= 3,
scale = c(6,2), max.words = 100, shape = "circle")
# filter positive texts and negative texts
r_pos_texts <- r_tweets %>%
filter(Tone_of_Text > 0) %>%
select(text)
r_neg_texts <- r_tweets %>%
filter(Tone_of_Text < 0) %>%
select(text)
com_texts <- c(r_pos_texts,r_neg_texts)
com_corpus <-VCorpus(VectorSource(com_texts))
com_corpus = tm_map(com_corpus, content_transformer(function(x) gsub("\\b[A-Z]+\\b","",x)))
com_corpus = tm_map(com_corpus, content_transformer(function(x) gsub("[^[:alnum:][:space:]]","",x)))
com_corpus = tm_map(com_corpus, content_transformer(tolower))
com_corpus = tm_map(com_corpus, removeNumbers)
com_corpus = tm_map(com_corpus, removePunctuation)
com_corpus = tm_map(com_corpus, removeWords, c(stopwords("en")))
com_corpus = tm_map(com_corpus, stripWhitespace)
#create document-term-matrix
com_dtm <- DocumentTermMatrix(com_corpus)
com_dtm_ma <- as.matrix(com_dtm)
com_tdm <- TermDocumentMatrix(com_corpus)
com_ma <- as.matrix(com_tdm)
colnames(com_ma) <- c("positive words","negative words")
comparison.cloud(com_ma, colors = c("coral","lightpink"), title.size= 4,
scale = c(6,2), max.words = 100, shape = "circle")
`
Relationship between Average Sentiment Score of Governors’ tweets about COVID19 and Confirmed COVID Cases by State
#putting average sentiment to leaflet map
content<-paste("State:", allbystate$state, "<br/>",
"Avg sentiment score of governor tweets about COVID:", allbystate$avg_senti, "<br/>", "Total number of cases:", allbystate$total_cases, "<br/>",
"Governor party:", allbystate$party, "<br/>")
pal=colorNumeric(palette="Oranges", allbystate$avg_senti)
leafletmap2<-leaflet() %>%
addTiles() %>%
addProviderTiles("Stamen.TonerLite") %>%
setView(-98.1156, 38.4204, zoom=4) %>%
addPolygons(data=allbystate, fillColor=~pal(allbystate$avg_senti), color=allbystate$party, fillOpacity=0.7, weight=2, smoothFactor=0.2, popup=content, label=~stringr::str_c(state, ' See pop-up for more info'), labelOptions=labelOptions(direction='auto'), highlightOptions=highlightOptions(color=allbystate$party, weight=5, bringToFront=TRUE, sendToBack=TRUE)) %>%
addLegend("bottomright", pal=pal, values=allbystate$avg_senti, title="Average sentiment score of governors' tweets about COVID", opacity=1)
leafletmap2
knitr::include_graphics("1.png")